From f6002f6139f4db5a6ee36a4d422b8189b143ce46 Mon Sep 17 00:00:00 2001 From: Max Moroz Date: Fri, 17 Jan 2020 06:24:15 -0800 Subject: [infra] Collect dataflow traces on the builder (#1632). (#3238) * [infra] Skeleton of the changes needed for collecting DFT on the builder (#1632). * move ENGINE_INFO to the helper as well * make collect_dft +x * syntax fixes * add actual dataflow tracer script * format * more refactoring and cleanup * format * address Oliver's feedback * format * more fixes * format * do not redirect stderr to stdout * add exit at the end of main * address feedback from Oliver --- infra/gcb/build_and_run_coverage.py | 67 +++--------------- infra/gcb/build_lib.py | 134 ++++++++++++++++++++++++++++++++++++ infra/gcb/build_project.py | 116 +++++++++++-------------------- infra/gcb/cancel.py | 6 +- 4 files changed, 186 insertions(+), 137 deletions(-) create mode 100644 infra/gcb/build_lib.py (limited to 'infra/gcb') diff --git a/infra/gcb/build_and_run_coverage.py b/infra/gcb/build_and_run_coverage.py index f6b08666..d7599ba7 100644 --- a/infra/gcb/build_and_run_coverage.py +++ b/infra/gcb/build_and_run_coverage.py @@ -11,29 +11,20 @@ import requests import sys import urlparse +import build_lib import build_project SANITIZER = 'coverage' CONFIGURATION = ['FUZZING_ENGINE=libfuzzer', 'SANITIZER=%s' % SANITIZER] PLATFORM = 'linux' -# Where corpus backups can be downloaded from. -CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/' - 'corpus/libFuzzer/{fuzzer}/latest.zip') - -# Cloud Builder has a limit of 100 build steps and 100 arguments for each step. -CORPUS_DOWNLOAD_BATCH_SIZE = 100 - COVERAGE_BUILD_TAG = 'coverage' -# Needed for reading public target.list.* files. -GCS_URL_BASENAME = 'https://storage.googleapis.com/' - # Where code coverage reports need to be uploaded to. COVERAGE_BUCKET_NAME = 'oss-fuzz-coverage' # Link to the code coverage report in HTML format. -HTML_REPORT_URL_FORMAT = (GCS_URL_BASENAME + COVERAGE_BUCKET_NAME + +HTML_REPORT_URL_FORMAT = (build_lib.GCS_URL_BASENAME + COVERAGE_BUCKET_NAME + '/{project}/reports/{date}/{platform}/index.html') # This is needed for ClusterFuzz to pick up the most recent reports data. @@ -74,10 +65,6 @@ def get_build_steps(project_dir): skip_build('Project "%s" uses go-fuzz, coverage is not supported yet.' % project_name) - fuzz_targets = get_targets_list(project_name) - if not fuzz_targets: - skip_build('No fuzz targets found for project "%s".' % project_name) - dockerfile_path = os.path.join(project_dir, 'Dockerfile') name = project_yaml['name'] image = project_yaml['image'] @@ -143,32 +130,11 @@ def get_build_steps(project_dir): ], }) - # Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE. - for i in xrange(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE): - download_corpus_args = [] - for binary_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]: - qualified_name = binary_name - qualified_name_prefix = '%s_' % project_name - if not binary_name.startswith(qualified_name_prefix): - qualified_name = qualified_name_prefix + binary_name - - url = build_project.get_signed_url(CORPUS_BACKUP_URL.format( - project=project_name, fuzzer=qualified_name), - method='GET') - - corpus_archive_path = os.path.join('/corpus', binary_name + '.zip') - download_corpus_args.append('%s %s' % (corpus_archive_path, url)) - - # Download corpus. - build_steps.append({ - 'name': 'gcr.io/oss-fuzz-base/base-runner', - 'entrypoint': 'download_corpus', - 'args': download_corpus_args, - 'volumes': [{ - 'name': 'corpus', - 'path': '/corpus' - }], - }) + download_corpora_step = build_lib.download_corpora_step(project_name) + if not download_corpora_step: + skip_build("Skipping code coverage build for %s.\n" % project_name) + + build_steps.append(download_corpora_step) failure_msg = ('*' * 80 + '\nCode coverage report generation failed.\n' 'To reproduce, run:\n' @@ -267,7 +233,7 @@ def get_build_steps(project_dir): }) # Update the latest report information file for ClusterFuzz. - latest_report_info_url = build_project.get_signed_url( + latest_report_info_url = build_lib.get_signed_url( LATEST_REPORT_INFO_URL.format(project=project_name), method='PUT', content_type='application/json') @@ -300,23 +266,6 @@ def get_build_steps(project_dir): return build_steps -def get_targets_list(project_name): - # libFuzzer ASan is the default configuration, get list of targets from it. - url = build_project.get_targets_list_url( - build_project.ENGINE_INFO['libfuzzer'].upload_bucket, project_name, - 'address') - - url = urlparse.urljoin(GCS_URL_BASENAME, url) - r = requests.get(url) - if not r.status_code == 200: - sys.stderr.write('Failed to get list of targets from "%s".\n' % url) - sys.stderr.write('Status code: %d \t\tText:\n%s\n' % - (r.status_code, r.text)) - return None - - return r.text.split() - - def main(): if len(sys.argv) != 2: usage() diff --git a/infra/gcb/build_lib.py b/infra/gcb/build_lib.py new file mode 100644 index 00000000..d3508730 --- /dev/null +++ b/infra/gcb/build_lib.py @@ -0,0 +1,134 @@ +"""Utility module for Google Cloud Build scripts.""" +import base64 +import collections +import os +import requests +import sys +import time +import urllib +import urlparse + +from oauth2client.service_account import ServiceAccountCredentials + +BUILD_TIMEOUT = 12 * 60 * 60 + +# Needed for reading public target.list.* files. +GCS_URL_BASENAME = 'https://storage.googleapis.com/' + +GCS_UPLOAD_URL_FORMAT = '/{0}/{1}/{2}' + +# Where corpus backups can be downloaded from. +CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/' + 'corpus/libFuzzer/{fuzzer}/latest.zip') + +# Cloud Builder has a limit of 100 build steps and 100 arguments for each step. +CORPUS_DOWNLOAD_BATCH_SIZE = 100 + +TARGETS_LIST_BASENAME = 'targets.list' + +EngineInfo = collections.namedtuple( + 'EngineInfo', + ['upload_bucket', 'supported_sanitizers', 'supported_architectures']) + +ENGINE_INFO = { + 'libfuzzer': + EngineInfo(upload_bucket='clusterfuzz-builds', + supported_sanitizers=['address', 'memory', 'undefined'], + supported_architectures=['x86_64', 'i386']), + 'afl': + EngineInfo(upload_bucket='clusterfuzz-builds-afl', + supported_sanitizers=['address'], + supported_architectures=['x86_64']), + 'honggfuzz': + EngineInfo(upload_bucket='clusterfuzz-builds-honggfuzz', + supported_sanitizers=['address', 'memory', 'undefined'], + supported_architectures=['x86_64']), + 'dataflow': + EngineInfo(upload_bucket='clusterfuzz-builds-dataflow', + supported_sanitizers=['dataflow'], + supported_architectures=['x86_64']), + 'none': + EngineInfo(upload_bucket='clusterfuzz-builds-no-engine', + supported_sanitizers=['address'], + supported_architectures=['x86_64']), +} + + +def get_targets_list_filename(sanitizer): + return TARGETS_LIST_BASENAME + '.' + sanitizer + + +def get_targets_list_url(bucket, project, sanitizer): + filename = get_targets_list_filename(sanitizer) + url = GCS_UPLOAD_URL_FORMAT.format(bucket, project, filename) + return url + + +def _get_targets_list(project_name): + # libFuzzer ASan is the default configuration, get list of targets from it. + url = get_targets_list_url(ENGINE_INFO['libfuzzer'].upload_bucket, + project_name, 'address') + + url = urlparse.urljoin(GCS_URL_BASENAME, url) + response = requests.get(url) + if not response.status_code == 200: + sys.stderr.write('Failed to get list of targets from "%s".\n' % url) + sys.stderr.write('Status code: %d \t\tText:\n%s\n' % + (response.status_code, response.text)) + return None + + return response.text.split() + + +def get_signed_url(path, method='PUT', content_type=''): + timestamp = int(time.time() + BUILD_TIMEOUT) + blob = '{0}\n\n{1}\n{2}\n{3}'.format(method, content_type, timestamp, path) + + creds = ServiceAccountCredentials.from_json_keyfile_name( + os.environ['GOOGLE_APPLICATION_CREDENTIALS']) + client_id = creds.service_account_email + signature = base64.b64encode(creds.sign_blob(blob)[1]) + values = { + 'GoogleAccessId': client_id, + 'Expires': timestamp, + 'Signature': signature, + } + + return ('https://storage.googleapis.com{0}?'.format(path) + + urllib.urlencode(values)) + + +def download_corpora_step(project_name): + """Returns a GCB step for downloading corpora backups for the given project. + """ + fuzz_targets = _get_targets_list(project_name) + if not fuzz_targets: + sys.stderr.write('No fuzz targets found for project "%s".\n' % project_name) + return None + + # Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE. + for i in range(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE): + download_corpus_args = [] + for binary_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]: + qualified_name = binary_name + qualified_name_prefix = '%s_' % project_name + if not binary_name.startswith(qualified_name_prefix): + qualified_name = qualified_name_prefix + binary_name + + url = get_signed_url(CORPUS_BACKUP_URL.format(project=project_name, + fuzzer=qualified_name), + method='GET') + + corpus_archive_path = os.path.join('/corpus', binary_name + '.zip') + download_corpus_args.append('%s %s' % (corpus_archive_path, url)) + + step = { + 'name': 'gcr.io/oss-fuzz-base/base-runner', + 'entrypoint': 'download_corpus', + 'args': download_corpus_args, + 'volumes': [{ + 'name': 'corpus', + 'path': '/corpus' + }], + } + return step diff --git a/infra/gcb/build_project.py b/infra/gcb/build_project.py index 0b640542..642fce21 100644 --- a/infra/gcb/build_project.py +++ b/infra/gcb/build_project.py @@ -6,22 +6,17 @@ Usage: build_project.py from __future__ import print_function -import base64 -import collections import datetime import json import os import re import sys -import time -import urllib import yaml from oauth2client.client import GoogleCredentials -from oauth2client.service_account import ServiceAccountCredentials from googleapiclient.discovery import build -BUILD_TIMEOUT = 12 * 60 * 60 +import build_lib FUZZING_BUILD_TAG = 'fuzzing' @@ -39,41 +34,10 @@ CONFIGURATIONS = { 'engine-none': ['FUZZING_ENGINE=none'], } -EngineInfo = collections.namedtuple( - 'EngineInfo', - ['upload_bucket', 'supported_sanitizers', 'supported_architectures']) - -ENGINE_INFO = { - 'libfuzzer': - EngineInfo(upload_bucket='clusterfuzz-builds', - supported_sanitizers=['address', 'memory', 'undefined'], - supported_architectures=['x86_64', 'i386']), - 'afl': - EngineInfo(upload_bucket='clusterfuzz-builds-afl', - supported_sanitizers=['address'], - supported_architectures=['x86_64']), - 'honggfuzz': - EngineInfo(upload_bucket='clusterfuzz-builds-honggfuzz', - supported_sanitizers=['address', 'memory', 'undefined'], - supported_architectures=['x86_64']), - 'dataflow': - EngineInfo(upload_bucket='clusterfuzz-builds-dataflow', - supported_sanitizers=['dataflow'], - supported_architectures=['x86_64']), - 'none': - EngineInfo(upload_bucket='clusterfuzz-builds-no-engine', - supported_sanitizers=['address'], - supported_architectures=['x86_64']), -} - DEFAULT_ARCHITECTURES = ['x86_64'] DEFAULT_ENGINES = ['libfuzzer', 'afl', 'honggfuzz'] DEFAULT_SANITIZERS = ['address', 'undefined'] -TARGETS_LIST_BASENAME = 'targets.list' - -UPLOAD_URL_FORMAT = '/{0}/{1}/{2}' - def usage(): sys.stderr.write('Usage: ' + sys.argv[0] + ' \n') @@ -97,26 +61,8 @@ def load_project_yaml(project_dir): return project_yaml -def get_signed_url(path, method='PUT', content_type=''): - timestamp = int(time.time() + BUILD_TIMEOUT) - blob = '{0}\n\n{1}\n{2}\n{3}'.format(method, content_type, timestamp, path) - - creds = ServiceAccountCredentials.from_json_keyfile_name( - os.environ['GOOGLE_APPLICATION_CREDENTIALS']) - client_id = creds.service_account_email - signature = base64.b64encode(creds.sign_blob(blob)[1]) - values = { - 'GoogleAccessId': client_id, - 'Expires': timestamp, - 'Signature': signature, - } - - return ('https://storage.googleapis.com{0}?'.format(path) + - urllib.urlencode(values)) - - def is_supported_configuration(fuzzing_engine, sanitizer, architecture): - fuzzing_engine_info = ENGINE_INFO[fuzzing_engine] + fuzzing_engine_info = build_lib.ENGINE_INFO[fuzzing_engine] if architecture == 'i386' and sanitizer != 'address': return False return (sanitizer in fuzzing_engine_info.supported_sanitizers and @@ -213,17 +159,18 @@ def get_build_steps(project_dir): stamped_name = '-'.join([name, sanitizer, ts]) zip_file = stamped_name + '.zip' stamped_srcmap_file = stamped_name + '.srcmap.json' - bucket = ENGINE_INFO[fuzzing_engine].upload_bucket + bucket = build_lib.ENGINE_INFO[fuzzing_engine].upload_bucket if architecture != 'x86_64': bucket += '-' + architecture - upload_url = get_signed_url( - UPLOAD_URL_FORMAT.format(bucket, name, zip_file)) - srcmap_url = get_signed_url( - UPLOAD_URL_FORMAT.format(bucket, name, stamped_srcmap_file)) + upload_url = build_lib.get_signed_url( + build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, name, zip_file)) + srcmap_url = build_lib.get_signed_url( + build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, name, + stamped_srcmap_file)) - targets_list_filename = get_targets_list_filename(sanitizer) - targets_list_url = get_signed_url( - get_targets_list_url(bucket, name, sanitizer)) + targets_list_filename = build_lib.get_targets_list_filename(sanitizer) + targets_list_url = build_lib.get_signed_url( + build_lib.get_targets_list_url(bucket, name, sanitizer)) env.append('OUT=' + out) env.append('MSAN_LIBS_PATH=/workspace/msan') @@ -320,6 +267,13 @@ def get_build_steps(project_dir): ], }) + if sanitizer == 'dataflow' and fuzzing_engine == 'dataflow': + dataflow_steps = dataflow_post_build_steps(name) + if dataflow_steps: + build_steps.extend(dataflow_steps) + else: + sys.stderr.write('Skipping dataflow post build steps.\n') + build_steps.extend([ # generate targets list { @@ -383,22 +337,34 @@ def get_build_steps(project_dir): return build_steps +def dataflow_post_build_steps(project_name): + steps = [] + download_corpora_step = build_lib.download_corpora_step(project_name) + if not download_corpora_step: + return None + + steps = [download_corpora_step] + steps.append({ + 'name': 'gcr.io/oss-fuzz-base/base-runner', + 'args': [ + 'bash', '-c', + ('for f in /corpus/*.zip; do unzip -q $f -d ${f%%.*}; done && ' + 'collect_dft || (echo "DFT collection failed." && false)') + ], + 'volumes': [{ + 'name': 'corpus', + 'path': '/corpus' + }], + }) + return steps + + def get_logs_url(build_id): URL_FORMAT = ('https://console.developers.google.com/logs/viewer?' 'resource=build%2Fbuild_id%2F{0}&project=oss-fuzz') return URL_FORMAT.format(build_id) -def get_targets_list_filename(sanitizer): - return TARGETS_LIST_BASENAME + '.' + sanitizer - - -def get_targets_list_url(bucket, project, sanitizer): - filename = get_targets_list_filename(sanitizer) - url = UPLOAD_URL_FORMAT.format(bucket, project, filename) - return url - - def run_build(build_steps, project_name, tag): options = {} if 'GCB_OPTIONS' in os.environ: @@ -406,7 +372,7 @@ def run_build(build_steps, project_name, tag): build_body = { 'steps': build_steps, - 'timeout': str(BUILD_TIMEOUT) + 's', + 'timeout': str(build_lib.BUILD_TIMEOUT) + 's', 'options': options, 'logsBucket': GCB_LOGS_BUCKET, 'tags': [project_name + '-' + tag,], diff --git a/infra/gcb/cancel.py b/infra/gcb/cancel.py index 331244fe..8393a514 100755 --- a/infra/gcb/cancel.py +++ b/infra/gcb/cancel.py @@ -15,7 +15,6 @@ import urllib import yaml from oauth2client.client import GoogleCredentials -from oauth2client.service_account import ServiceAccountCredentials from googleapiclient.discovery import build @@ -32,8 +31,9 @@ def main(): credentials = GoogleCredentials.get_application_default() cloudbuild = build('cloudbuild', 'v1', credentials=credentials) - print cloudbuild.projects().builds().cancel( - projectId='oss-fuzz', id=build_id, body={}).execute() + print cloudbuild.projects().builds().cancel(projectId='oss-fuzz', + id=build_id, + body={}).execute() if __name__ == '__main__': -- cgit v1.2.3