diff options
Diffstat (limited to 'infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py')
-rwxr-xr-x | infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py | 820 |
1 files changed, 820 insertions, 0 deletions
diff --git a/infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py b/infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py new file mode 100755 index 0000000000..f8c3c830ad --- /dev/null +++ b/infra/bots/recipe_modules/skia/resources/run_binary_size_analysis.py @@ -0,0 +1,820 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Generate a spatial analysis against an arbitrary library. + +Adapted for Skia's use case from +chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes: + +-- Cleans up some deprecated codes. +-- Always use relative code path so the tree root is Skia repo's root. +-- Instead of outputting the standalone HTML/CSS/JS filesets, writes the + TreeMap JSON data into a Google Storage bucket. +-- Adds githash and total_size to the JSON data. +-- Outputs another summary data in JSON Bench format for skiaperf ingestion. + +The output JSON data for visualization is in the following format: + +{ + "githash": 123abc, + "commit_ts": 1234567890, + "total_size": 1234567, + "key": {"source_type": "binary_size"}, + "tree_data": { + "maxDepth": 9, + "k": "p", "children":[ + {"k":"p","children":[ + {"k":"p","children":[ + {"k":"p","lastPathElement":true,"children":[ + {"k":"b","t":"t","children":[ + {"k":"s", "t":"t", "value":4029, + "n":"etc_encode_subblock_helper(unsigned char const*, ...)" + }, + ...... + } +} + +Another JSON file is generated for size summaries to be used in skiaperf. The +JSON format details can be found at: + https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54 +and: + https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go + +In the binary size case, outputs look like: + +{ + "gitHash": "123abc", + "key": { + "source_type": "binarysize" + } + "results: { + "src_lazy_global_weak_symbol": { + "memory": { + "bytes": 41, + "options": { + "path": "src_lazy", + "symbol": "global_weak_symbol" + } + } + }, + "src_lazy_global_read_only_data": { + "memory": { + "bytes": 13476, + "options": { + "path": "src_lazy", + "symbol": "global_read_only_data" + } + } + }, + ... + } +} + +""" + +import collections +import datetime +import json +import logging +import multiprocessing +import optparse +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile +import time +import urllib2 + +import binary_size_utils +import elf_symbolizer + +from recipe_engine.types import freeze + +# Skia addition +from common.skia import global_constants + +# Node dictionary keys. These are output in json read by the webapp so +# keep them short to save file size. +# Note: If these change, the webapp must also change. +NODE_TYPE_KEY = 'k' +NODE_NAME_KEY = 'n' +NODE_CHILDREN_KEY = 'children' +NODE_SYMBOL_TYPE_KEY = 't' +NODE_SYMBOL_SIZE_KEY = 'value' +NODE_MAX_DEPTH_KEY = 'maxDepth' +NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' + +# The display name of the bucket where we put symbols without path. +NAME_NO_PATH_BUCKET = '(No Path)' + +# Try to keep data buckets smaller than this to avoid killing the +# graphing lib. +BIG_BUCKET_LIMIT = 3000 + +# Skia addition: relative dir for libskia.so from code base. +LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib') + +# Skia addition: dictionary mapping symbol type code to symbol name. +# See +# https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/template/D3SymbolTreeMap.js&l=74 +SYMBOL_MAP = freeze({ + 'A': 'global_absolute', + 'B': 'global_uninitialized_data', + 'b': 'local_uninitialized_data', + 'C': 'global_uninitialized_common', + 'D': 'global_initialized_data', + 'd': 'local_initialized_data', + 'G': 'global_small initialized_data', + 'g': 'local_small_initialized_data', + 'i': 'indirect_function', + 'N': 'debugging', + 'p': 'stack_unwind', + 'R': 'global_read_only_data', + 'r': 'local_read_only_data', + 'S': 'global_small_uninitialized_data', + 's': 'local_small_uninitialized_data', + 'T': 'global_code', + 't': 'local_code', + 'U': 'undefined', + 'u': 'unique', + 'V': 'global_weak_object', + 'v': 'local_weak_object', + 'W': 'global_weak_symbol', + 'w': 'local_weak_symbol', + '@': 'vtable_entry', + '-': 'stabs_debugging', + '?': 'unrecognized', +}) + + +def _MkChild(node, name): + child = node[NODE_CHILDREN_KEY].get(name) + if child is None: + child = {NODE_NAME_KEY: name, + NODE_CHILDREN_KEY: {}} + node[NODE_CHILDREN_KEY][name] = child + return child + + +def SplitNoPathBucket(node): + """NAME_NO_PATH_BUCKET can be too large for the graphing lib to + handle. Split it into sub-buckets in that case.""" + root_children = node[NODE_CHILDREN_KEY] + if NAME_NO_PATH_BUCKET in root_children: + no_path_bucket = root_children[NAME_NO_PATH_BUCKET] + old_children = no_path_bucket[NODE_CHILDREN_KEY] + count = 0 + for symbol_type, symbol_bucket in old_children.iteritems(): + count += len(symbol_bucket[NODE_CHILDREN_KEY]) + if count > BIG_BUCKET_LIMIT: + new_children = {} + no_path_bucket[NODE_CHILDREN_KEY] = new_children + current_bucket = None + index = 0 + for symbol_type, symbol_bucket in old_children.iteritems(): + for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): + if index % BIG_BUCKET_LIMIT == 0: + group_no = (index / BIG_BUCKET_LIMIT) + 1 + current_bucket = _MkChild(no_path_bucket, + '%s subgroup %d' % (NAME_NO_PATH_BUCKET, + group_no)) + assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' + node[NODE_TYPE_KEY] = 'p' # p for path + index += 1 + symbol_size = value[NODE_SYMBOL_SIZE_KEY] + AddSymbolIntoFileNode(current_bucket, symbol_type, + symbol_name, symbol_size) + + +def MakeChildrenDictsIntoLists(node): + largest_list_len = 0 + if NODE_CHILDREN_KEY in node: + largest_list_len = len(node[NODE_CHILDREN_KEY]) + child_list = [] + for child in node[NODE_CHILDREN_KEY].itervalues(): + child_largest_list_len = MakeChildrenDictsIntoLists(child) + if child_largest_list_len > largest_list_len: + largest_list_len = child_largest_list_len + child_list.append(child) + node[NODE_CHILDREN_KEY] = child_list + + return largest_list_len + + +def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): + """Puts symbol into the file path node |node|. + Returns the number of added levels in tree. I.e. returns 2.""" + + # 'node' is the file node and first step is to find its symbol-type bucket. + node[NODE_LAST_PATH_ELEMENT_KEY] = True + node = _MkChild(node, symbol_type) + assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' + node[NODE_SYMBOL_TYPE_KEY] = symbol_type + node[NODE_TYPE_KEY] = 'b' # b for bucket + + # 'node' is now the symbol-type bucket. Make the child entry. + node = _MkChild(node, symbol_name) + if NODE_CHILDREN_KEY in node: + if node[NODE_CHILDREN_KEY]: + logging.warning('A container node used as symbol for %s.' % symbol_name) + # This is going to be used as a leaf so no use for child list. + del node[NODE_CHILDREN_KEY] + node[NODE_SYMBOL_SIZE_KEY] = symbol_size + node[NODE_SYMBOL_TYPE_KEY] = symbol_type + node[NODE_TYPE_KEY] = 's' # s for symbol + + return 2 # Depth of the added subtree. + + +def MakeCompactTree(symbols, symbol_path_origin_dir): + result = {NODE_NAME_KEY: '/', + NODE_CHILDREN_KEY: {}, + NODE_TYPE_KEY: 'p', + NODE_MAX_DEPTH_KEY: 0} + seen_symbol_with_path = False + for symbol_name, symbol_type, symbol_size, file_path in symbols: + + if 'vtable for ' in symbol_name: + symbol_type = '@' # hack to categorize these separately + if file_path and file_path != "??": + seen_symbol_with_path = True + else: + file_path = NAME_NO_PATH_BUCKET + + path_parts = file_path.split('/') + + # Find pre-existing node in tree, or update if it already exists + node = result + depth = 0 + while len(path_parts) > 0: + path_part = path_parts.pop(0) + if len(path_part) == 0: + continue + depth += 1 + node = _MkChild(node, path_part) + assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' + node[NODE_TYPE_KEY] = 'p' # p for path + + depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) + result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) + + if not seen_symbol_with_path: + logging.warning('Symbols lack paths. Data will not be structured.') + + # The (no path) bucket can be extremely large if we failed to get + # path information. Split it into subgroups if needed. + SplitNoPathBucket(result) + + largest_list_len = MakeChildrenDictsIntoLists(result) + + if largest_list_len > BIG_BUCKET_LIMIT: + logging.warning('There are sections with %d nodes. ' + 'Results might be unusable.' % largest_list_len) + return result + + +# Skia added: summarizes tree size by symbol type for the given root node. +# Returns a dict keyed by symbol type, and value the type's overall size. +# e.g., {"t": 12345, "W": 543}. +def GetTreeSizes(node): + if 'children' not in node or not node['children']: + return {node['t']: node['value']} + dic = {} + for i in node['children']: + for k, v in GetTreeSizes(i).items(): + dic.setdefault(k, 0) + dic[k] += v + + return dic + + +# Skia added: creates dict to be converted to JSON in bench format. +# See top of file for the structure description. +def GetBenchDict(githash, tree_root): + dic = {'gitHash': githash, + 'key': {'source_type': 'binarysize'}, + 'results': {},} + for i in tree_root['children']: + if '(No Path)' == i['n']: # Already at symbol summary level. + for k, v in GetTreeSizes(i).items(): + dic['results']['no_path_' + SYMBOL_MAP[k]] = { + 'memory': { + 'bytes': v, + 'options': {'path': 'no_path', + 'symbol': SYMBOL_MAP[k],},}} + else: # We need to go deeper. + for c in i['children']: + path = i['n'] + '_' + c['n'] + for k, v in GetTreeSizes(c).items(): + dic['results'][path + '_' + SYMBOL_MAP[k]] = { + 'memory': { + 'bytes': v, + 'options': {'path': path, + 'symbol': SYMBOL_MAP[k],}}} + + return dic + + +# Skia added: constructs 'gsutil cp' subprocess command list. +def GetGsCopyCommandList(gsutil, src, dst): + return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a', + 'public-read', src, dst] + + +def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil): + tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) + json_data = {'tree_data': tree_root, + 'githash': ha, + 'commit_ts': ts, + 'key': {'source_type': 'binary_size'}, + 'total_size': sum(GetTreeSizes(tree_root).values()),} + tmpfile = tempfile.NamedTemporaryFile(delete=False).name + with open(tmpfile, 'w') as out: + # Use separators without whitespace to get a smaller file. + json.dump(json_data, out, separators=(',', ':')) + + GS_PREFIX = 'gs://' + global_constants.GS_GM_BUCKET + '/' + # Writes to Google Storage for visualization. + subprocess.check_call(GetGsCopyCommandList( + gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json')) + # Updates the latest data. + if not issue: + subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, + GS_PREFIX + 'size/latest.json')) + # Writes an extra copy using year/month/day/hour path for easy ingestion. + with open(tmpfile, 'w') as out: + json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':')) + now = datetime.datetime.utcnow() + ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4), + str(now.month).zfill(2), str(now.day).zfill(2), + str(now.hour).zfill(2))) + if issue: + ingest_path = '/'.join('trybot', ingest_path, issue) + subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, + GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json')) + + +def MakeSourceMap(symbols): + sources = {} + for _sym, _symbol_type, size, path in symbols: + key = None + if path: + key = os.path.normpath(path) + else: + key = '[no path]' + if key not in sources: + sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} + record = sources[key] + record['size'] += size + record['symbol_count'] += 1 + return sources + + +# Regex for parsing "nm" output. A sample line looks like this: +# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 +# +# The fields are: address, size, type, name, source location +# Regular expression explained ( see also: https://xkcd.com/208 ): +# ([0-9a-f]{8,}+) The address +# [\s]+ Whitespace separator +# ([0-9a-f]{8,}+) The size. From here on out it's all optional. +# [\s]+ Whitespace separator +# (\S?) The symbol type, which is any non-whitespace char +# [\s*] Whitespace separator +# ([^\t]*) Symbol name, any non-tab character (spaces ok!) +# [\t]? Tab separator +# (.*) The location (filename[:linennum|?][ (discriminator n)] +sNmPattern = re.compile( + r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') + +class Progress(): + def __init__(self): + self.count = 0 + self.skip_count = 0 + self.collisions = 0 + self.time_last_output = time.time() + self.count_last_output = 0 + self.disambiguations = 0 + self.was_ambiguous = 0 + + +def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, + disambiguate, src_path): + nm_output = RunNm(library, nm_binary) + nm_output_lines = nm_output.splitlines() + nm_output_lines_len = len(nm_output_lines) + address_symbol = {} + progress = Progress() + def map_address_symbol(symbol, addr): + progress.count += 1 + if addr in address_symbol: + # 'Collision between %s and %s.' % (str(symbol.name), + # str(address_symbol[addr].name)) + progress.collisions += 1 + else: + if symbol.disambiguated: + progress.disambiguations += 1 + if symbol.was_ambiguous: + progress.was_ambiguous += 1 + + address_symbol[addr] = symbol + + progress_output() + + def progress_output(): + progress_chunk = 100 + if progress.count % progress_chunk == 0: + time_now = time.time() + time_spent = time_now - progress.time_last_output + if time_spent > 1.0: + # Only output at most once per second. + progress.time_last_output = time_now + chunk_size = progress.count - progress.count_last_output + progress.count_last_output = progress.count + if time_spent > 0: + speed = chunk_size / time_spent + else: + speed = 0 + progress_percent = (100.0 * (progress.count + progress.skip_count) / + nm_output_lines_len) + disambiguation_percent = 0 + if progress.disambiguations != 0: + disambiguation_percent = (100.0 * progress.disambiguations / + progress.was_ambiguous) + + sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' + '%d disambiguations where %.1f%% succeeded)' + ' - %.1f lookups/s.' % + (progress_percent, progress.count, progress.collisions, + progress.disambiguations, disambiguation_percent, speed)) + + # In case disambiguation was disabled, we remove the source path (which upon + # being set signals the symbolizer to enable disambiguation) + if not disambiguate: + src_path = None + symbol_path_origin_dir = os.path.dirname(library) + # Skia specific. + symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') + symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, + map_address_symbol, + max_concurrent_jobs=jobs, + source_root_path=src_path, + prefix_to_remove=symbol_path_prefix) + user_interrupted = False + try: + for line in nm_output_lines: + match = sNmPattern.match(line) + if match: + location = match.group(5) + if not location: + addr = int(match.group(1), 16) + size = int(match.group(2), 16) + if addr in address_symbol: # Already looked up, shortcut + # ELFSymbolizer. + map_address_symbol(address_symbol[addr], addr) + continue + elif size == 0: + # Save time by not looking up empty symbols (do they even exist?) + print('Empty symbol: ' + line) + else: + symbolizer.SymbolizeAsync(addr, addr) + continue + + progress.skip_count += 1 + except KeyboardInterrupt: + user_interrupted = True + print('Interrupting - killing subprocesses. Please wait.') + + try: + symbolizer.Join() + except KeyboardInterrupt: + # Don't want to abort here since we will be finished in a few seconds. + user_interrupted = True + print('Patience you must have my young padawan.') + + print '' + + if user_interrupted: + print('Skipping the rest of the file mapping. ' + 'Output will not be fully classified.') + + symbol_path_origin_dir = os.path.dirname(library) + # Skia specific: path prefix to strip. + symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') + + with open(outfile, 'w') as out: + for line in nm_output_lines: + match = sNmPattern.match(line) + if match: + location = match.group(5) + if not location: + addr = int(match.group(1), 16) + symbol = address_symbol.get(addr) + if symbol is not None: + path = '??' + if symbol.source_path is not None: + path = symbol.source_path.replace(symbol_path_prefix, '') + line_number = 0 + if symbol.source_line is not None: + line_number = symbol.source_line + out.write('%s\t%s:%d\n' % (line, path, line_number)) + continue + + out.write('%s\n' % line) + + print('%d symbols in the results.' % len(address_symbol)) + + +def RunNm(binary, nm_binary): + cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', + binary] + nm_process = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (process_output, err_output) = nm_process.communicate() + + if nm_process.returncode != 0: + if err_output: + raise Exception, err_output + else: + raise Exception, process_output + + return process_output + + +def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, + addr2line_binary, nm_binary, disambiguate, src_path): + if nm_infile is None: + if outfile is None: + outfile = tempfile.NamedTemporaryFile(delete=False).name + + if verbose: + print 'Running parallel addr2line, dumping symbols to ' + outfile + RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, + disambiguate, src_path) + + nm_infile = outfile + + elif verbose: + print 'Using nm input from ' + nm_infile + with file(nm_infile, 'r') as infile: + return list(binary_size_utils.ParseNm(infile)) + + +PAK_RESOURCE_ID_TO_STRING = { "inited": False } + +def LoadPakIdsFromResourceFile(filename): + """Given a file name, it loads everything that looks like a resource id + into PAK_RESOURCE_ID_TO_STRING.""" + with open(filename) as resource_header: + for line in resource_header: + if line.startswith("#define "): + line_data = line.split() + if len(line_data) == 3: + try: + resource_number = int(line_data[2]) + resource_name = line_data[1] + PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name + except ValueError: + pass + +def GetReadablePakResourceName(pak_file, resource_id): + """Pak resources have a numeric identifier. It is not helpful when + trying to locate where footprint is generated. This does its best to + map the number to a usable string.""" + if not PAK_RESOURCE_ID_TO_STRING['inited']: + # Try to find resource header files generated by grit when + # building the pak file. We'll look for files named *resources.h" + # and lines of the type: + # #define MY_RESOURCE_JS 1234 + PAK_RESOURCE_ID_TO_STRING['inited'] = True + gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') + if os.path.isdir(gen_dir): + for dirname, _dirs, files in os.walk(gen_dir): + for filename in files: + if filename.endswith('resources.h'): + LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) + return PAK_RESOURCE_ID_TO_STRING.get(resource_id, + 'Pak Resource %d' % resource_id) + +def AddPakData(symbols, pak_file): + """Adds pseudo-symbols from a pak file.""" + pak_file = os.path.abspath(pak_file) + with open(pak_file, 'rb') as pak: + data = pak.read() + + PAK_FILE_VERSION = 4 + HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) + # and one uint8 (encoding of text resources) + INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. + version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) + assert version == PAK_FILE_VERSION, ('Unsupported pak file ' + 'version (%d) in %s. Only ' + 'support version %d' % + (version, pak_file, PAK_FILE_VERSION)) + if num_entries > 0: + # Read the index and data. + data = data[HEADER_LENGTH:] + for _ in range(num_entries): + resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) + data = data[INDEX_ENTRY_SIZE:] + _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) + resource_size = next_offset - offset + + symbol_name = GetReadablePakResourceName(pak_file, resource_id) + symbol_path = pak_file + symbol_type = 'd' # Data. Approximation. + symbol_size = resource_size + symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) + +def _find_in_system_path(binary): + """Locate the full path to binary in the system path or return None + if not found.""" + system_path = os.environ["PATH"].split(os.pathsep) + for path in system_path: + binary_path = os.path.join(path, binary) + if os.path.isfile(binary_path): + return binary_path + return None + +def CheckDebugFormatSupport(library, addr2line_binary): + """Kills the program if debug data is in an unsupported format. + + There are two common versions of the DWARF debug formats and + since we are right now transitioning from DWARF2 to newer formats, + it's possible to have a mix of tools that are not compatible. Detect + that and abort rather than produce meaningless output.""" + tool_output = subprocess.check_output([addr2line_binary, '--version']) + version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) + parsed_output = version_re.match(tool_output) + major = int(parsed_output.group(1)) + minor = int(parsed_output.group(2)) + supports_dwarf4 = major > 2 or major == 2 and minor > 22 + + if supports_dwarf4: + return + + print('Checking version of debug information in %s.' % library) + debug_info = subprocess.check_output(['readelf', '--debug-dump=info', + '--dwarf-depth=1', library]) + dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) + parsed_dwarf_format_output = dwarf_version_re.search(debug_info) + version = int(parsed_dwarf_format_output.group(1)) + if version > 2: + print('The supplied tools only support DWARF2 debug data but the binary\n' + + 'uses DWARF%d. Update the tools or compile the binary\n' % version + + 'with -gdwarf-2.') + sys.exit(1) + + +def main(): + usage = """%prog [options] + + Runs a spatial analysis on a given library, looking up the source locations + of its symbols and calculating how much space each directory, source file, + and so on is taking. The result is a report that can be used to pinpoint + sources of large portions of the binary, etceteras. + + Under normal circumstances, you only need to pass two arguments, thusly: + + %prog --library /path/to/library --destdir /path/to/output + + In this mode, the program will dump the symbols from the specified library + and map those symbols back to source locations, producing a web-based + report in the specified output directory. + + Other options are available via '--help'. + """ + parser = optparse.OptionParser(usage=usage) + parser.add_option('--nm-in', metavar='PATH', + help='if specified, use nm input from <path> instead of ' + 'generating it. Note that source locations should be ' + 'present in the file; i.e., no addr2line symbol lookups ' + 'will be performed when this option is specified. ' + 'Mutually exclusive with --library.') + parser.add_option('--destdir', metavar='PATH', + help='write output to the specified directory. An HTML ' + 'report is generated here along with supporting files; ' + 'any existing report will be overwritten. Not used in ' + 'Skia.') + parser.add_option('--library', metavar='PATH', + help='if specified, process symbols in the library at ' + 'the specified path. Mutually exclusive with --nm-in.') + parser.add_option('--pak', metavar='PATH', + help='if specified, includes the contents of the ' + 'specified *.pak file in the output.') + parser.add_option('--nm-binary', + help='use the specified nm binary to analyze library. ' + 'This is to be used when the nm in the path is not for ' + 'the right architecture or of the right version.') + parser.add_option('--addr2line-binary', + help='use the specified addr2line binary to analyze ' + 'library. This is to be used when the addr2line in ' + 'the path is not for the right architecture or ' + 'of the right version.') + parser.add_option('--jobs', type='int', + help='number of jobs to use for the parallel ' + 'addr2line processing pool; defaults to 1. More ' + 'jobs greatly improve throughput but eat RAM like ' + 'popcorn, and take several gigabytes each. Start low ' + 'and ramp this number up until your machine begins to ' + 'struggle with RAM. ' + 'This argument is only valid when using --library.') + parser.add_option('-v', dest='verbose', action='store_true', + help='be verbose, printing lots of status information.') + parser.add_option('--nm-out', metavar='PATH', + help='keep the nm output file, and store it at the ' + 'specified path. This is useful if you want to see the ' + 'fully processed nm output after the symbols have been ' + 'mapped to source locations. By default, a tempfile is ' + 'used and is deleted when the program terminates.' + 'This argument is only valid when using --library.') + parser.add_option('--legacy', action='store_true', + help='emit legacy binary size report instead of modern') + parser.add_option('--disable-disambiguation', action='store_true', + help='disables the disambiguation process altogether,' + ' NOTE: this may, depending on your toolchain, produce' + ' output with some symbols at the top layer if addr2line' + ' could not get the entire source path.') + parser.add_option('--source-path', default='./', + help='the path to the source code of the output binary, ' + 'default set to current directory. Used in the' + ' disambiguation process.') + parser.add_option('--githash', default='latest', + help='Git hash for the binary version. Added by Skia.') + parser.add_option('--commit_ts', type='int', default=-1, + help='Timestamp for the commit. Added by Skia.') + parser.add_option('--issue_number', default='', + help='The trybot issue number in string. Added by Skia.') + parser.add_option('--gsutil_path', default='gsutil', + help='Path to gsutil binary. Added by Skia.') + opts, _args = parser.parse_args() + + if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): + parser.error('exactly one of --library or --nm-in is required') + if (opts.nm_in): + if opts.jobs: + print >> sys.stderr, ('WARNING: --jobs has no effect ' + 'when used with --nm-in') + if not opts.jobs: + # Use the number of processors but cap between 2 and 4 since raw + # CPU power isn't the limiting factor. It's I/O limited, memory + # bus limited and available-memory-limited. Too many processes and + # the computer will run out of memory and it will be slow. + opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) + + if opts.addr2line_binary: + assert os.path.isfile(opts.addr2line_binary) + addr2line_binary = opts.addr2line_binary + else: + addr2line_binary = _find_in_system_path('addr2line') + assert addr2line_binary, 'Unable to find addr2line in the path. '\ + 'Use --addr2line-binary to specify location.' + + if opts.nm_binary: + assert os.path.isfile(opts.nm_binary) + nm_binary = opts.nm_binary + else: + nm_binary = _find_in_system_path('nm') + assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ + 'to specify location.' + + if opts.pak: + assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak + + print('addr2line: %s' % addr2line_binary) + print('nm: %s' % nm_binary) + + if opts.library: + CheckDebugFormatSupport(opts.library, addr2line_binary) + + symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, + opts.jobs, opts.verbose is True, + addr2line_binary, nm_binary, + opts.disable_disambiguation is None, + opts.source_path) + + if opts.pak: + AddPakData(symbols, opts.pak) + + if opts.legacy: # legacy report + print 'Do Not set legacy flag.' + + else: # modern report + if opts.library: + symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) + else: + # Just a guess. Hopefully all paths in the input file are absolute. + symbol_path_origin_dir = os.path.abspath(os.getcwd()) + DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash, + opts.commit_ts, opts.issue_number, opts.gsutil_path) + print 'Report data uploaded to GS.' + + +if __name__ == '__main__': + sys.exit(main()) |