diff options
author | 2016-08-03 08:23:10 -0700 | |
---|---|---|
committer | 2016-08-03 08:23:10 -0700 | |
commit | 1436a09e1fc3be4655af960d4ffb31066bfe4cdd (patch) | |
tree | e0f37f3938b2bf1dcd7a432943343839e994714c /infra/bots/recipe_modules/core/resources | |
parent | 7a9f3766aa07f69323f37670a7aeb13605f266bb (diff) |
Re-organize Skia recipes
Break Skia recipe module into:
- skia_vars: defines and stores variables (eg. paths)
- skia_step: utilities for running Skia steps
- skia_flavor: flavor-specific stuff
- skia: top-level setup, checkout, test/perf steps, etc etc
This establishes a saner dependency structure for the recipes; skia_vars
is at the bottom level, skia_step depends on it, skia_flavor depends on
both of them, skia depends on all of the above, and the recipes
themselves may depend on any or all of them.
Next steps:
- Merge buildbot_spec into skia_vars
- Move test_steps and perf_steps from skia recipe_module into
swarm_test and swarm_perf recipes
- Cleaner checkout_steps process
BUG=skia:5578
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2198173002
Review-Url: https://codereview.chromium.org/2198173002
Diffstat (limited to 'infra/bots/recipe_modules/core/resources')
6 files changed, 1609 insertions, 0 deletions
diff --git a/infra/bots/recipe_modules/core/resources/binary_size_utils.py b/infra/bots/recipe_modules/core/resources/binary_size_utils.py new file mode 100644 index 0000000000..c09a65dccd --- /dev/null +++ b/infra/bots/recipe_modules/core/resources/binary_size_utils.py @@ -0,0 +1,67 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Common utilities for tools that deal with binary size information. + +Copied from chromium/src/build/android/pylib/symbols/binary_size_tools.py. +""" + +import logging +import re + + +def ParseNm(nm_lines): + """Parse nm output, returning data for all relevant (to binary size) + symbols and ignoring the rest. + + Args: + nm_lines: an iterable over lines of nm output. + + Yields: + (symbol name, symbol type, symbol size, source file path). + + Path may be None if nm couldn't figure out the source file. + """ + + # Match lines with size, symbol, optional location, optional discriminator + sym_re = re.compile(r'^[0-9a-f]{8,} ' # address (8+ hex digits) + '([0-9a-f]{8,}) ' # size (8+ hex digits) + '(.) ' # symbol type, one character + '([^\t]+)' # symbol name, separated from next by tab + '(?:\t(.*):[\d\?]+)?.*$') # location + # Match lines with addr but no size. + addr_re = re.compile(r'^[0-9a-f]{8,} (.) ([^\t]+)(?:\t.*)?$') + # Match lines that don't have an address at all -- typically external symbols. + noaddr_re = re.compile(r'^ {8,} (.) (.*)$') + # Match lines with no symbol name, only addr and type + addr_only_re = re.compile(r'^[0-9a-f]{8,} (.)$') + + for line in nm_lines: + line = line.rstrip() + match = sym_re.match(line) + if match: + size, sym_type, sym = match.groups()[0:3] + size = int(size, 16) + if sym_type in ('B', 'b'): + continue # skip all BSS for now. + path = match.group(4) + yield sym, sym_type, size, path + continue + match = addr_re.match(line) + if match: + # sym_type, sym = match.groups()[0:2] + continue # No size == we don't care. + match = noaddr_re.match(line) + if match: + sym_type, sym = match.groups() + if sym_type in ('U', 'w'): + continue # external or weak symbol + match = addr_only_re.match(line) + if match: + continue # Nothing to do. + + + # If we reach this part of the loop, there was something in the + # line that we didn't expect or recognize. + logging.warning('nm output parser failed to parse: %s', repr(line)) diff --git a/infra/bots/recipe_modules/core/resources/elf_symbolizer.py b/infra/bots/recipe_modules/core/resources/elf_symbolizer.py new file mode 100644 index 0000000000..de9c141219 --- /dev/null +++ b/infra/bots/recipe_modules/core/resources/elf_symbolizer.py @@ -0,0 +1,477 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""The ElfSymbolizer class for symbolizing Executable and Linkable Files. + +Adapted for Skia's use from +chromium/src/build/android/pylib/symbols/elf_symbolizer.py. + +Main changes: +-- Added prefix_to_remove param to remove path prefix from tree data. +""" + +import collections +import datetime +import logging +import multiprocessing +import os +import posixpath +import Queue +import re +import subprocess +import sys +import threading + + +# addr2line builds a possibly infinite memory cache that can exhaust +# the computer's memory if allowed to grow for too long. This constant +# controls how many lookups we do before restarting the process. 4000 +# gives near peak performance without extreme memory usage. +ADDR2LINE_RECYCLE_LIMIT = 4000 + + +class ELFSymbolizer(object): + """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer. + + This class is a frontend for addr2line (part of GNU binutils), designed to + symbolize batches of large numbers of symbols for a given ELF file. It + supports sharding symbolization against many addr2line instances and + pipelining of multiple requests per each instance (in order to hide addr2line + internals and OS pipe latencies). + + The interface exhibited by this class is a very simple asynchronous interface, + which is based on the following three methods: + - SymbolizeAsync(): used to request (enqueue) resolution of a given address. + - The |callback| method: used to communicated back the symbol information. + - Join(): called to conclude the batch to gather the last outstanding results. + In essence, before the Join method returns, this class will have issued as + many callbacks as the number of SymbolizeAsync() calls. In this regard, note + that due to multiprocess sharding, callbacks can be delivered out of order. + + Some background about addr2line: + - it is invoked passing the elf path in the cmdline, piping the addresses in + its stdin and getting results on its stdout. + - it has pretty large response times for the first requests, but it + works very well in streaming mode once it has been warmed up. + - it doesn't scale by itself (on more cores). However, spawning multiple + instances at the same time on the same file is pretty efficient as they + keep hitting the pagecache and become mostly CPU bound. + - it might hang or crash, mostly for OOM. This class deals with both of these + problems. + + Despite the "scary" imports and the multi* words above, (almost) no multi- + threading/processing is involved from the python viewpoint. Concurrency + here is achieved by spawning several addr2line subprocesses and handling their + output pipes asynchronously. Therefore, all the code here (with the exception + of the Queue instance in Addr2Line) should be free from mind-blowing + thread-safety concerns. + + The multiprocess sharding works as follows: + The symbolizer tries to use the lowest number of addr2line instances as + possible (with respect of |max_concurrent_jobs|) and enqueue all the requests + in a single addr2line instance. For few symbols (i.e. dozens) sharding isn't + worth the startup cost. + The multiprocess logic kicks in as soon as the queues for the existing + instances grow. Specifically, once all the existing instances reach the + |max_queue_size| bound, a new addr2line instance is kicked in. + In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances + have a backlog of |max_queue_size|), back-pressure is applied on the caller by + blocking the SymbolizeAsync method. + + This module has been deliberately designed to be dependency free (w.r.t. of + other modules in this project), to allow easy reuse in external projects. + """ + + def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, + max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, + source_root_path=None, strip_base_path=None, prefix_to_remove=None): + """Args: + elf_file_path: path of the elf file to be symbolized. + addr2line_path: path of the toolchain's addr2line binary. + callback: a callback which will be invoked for each resolved symbol with + the two args (sym_info, callback_arg). The former is an instance of + |ELFSymbolInfo| and contains the symbol information. The latter is an + embedder-provided argument which is passed to SymbolizeAsync(). + inlines: when True, the ELFSymbolInfo will contain also the details about + the outer inlining functions. When False, only the innermost function + will be provided. + max_concurrent_jobs: Max number of addr2line instances spawned. + Parallelize responsibly, addr2line is a memory and I/O monster. + max_queue_size: Max number of outstanding requests per addr2line instance. + addr2line_timeout: Max time (in seconds) to wait for a addr2line response. + After the timeout, the instance will be considered hung and respawned. + source_root_path: In some toolchains only the name of the source file is + is output, without any path information; disambiguation searches + through the source directory specified by |source_root_path| argument + for files whose name matches, adding the full path information to the + output. For example, if the toolchain outputs "unicode.cc" and there + is a file called "unicode.cc" located under |source_root_path|/foo, + the tool will replace "unicode.cc" with + "|source_root_path|/foo/unicode.cc". If there are multiple files with + the same name, disambiguation will fail because the tool cannot + determine which of the files was the source of the symbol. + strip_base_path: Rebases the symbols source paths onto |source_root_path| + (i.e replace |strip_base_path| with |source_root_path). + prefix_to_remove: Removes the prefix from ElfSymbolInfo output. Skia added + """ + assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path + self.elf_file_path = elf_file_path + self.addr2line_path = addr2line_path + self.callback = callback + self.inlines = inlines + self.max_concurrent_jobs = (max_concurrent_jobs or + min(multiprocessing.cpu_count(), 4)) + self.max_queue_size = max_queue_size + self.addr2line_timeout = addr2line_timeout + self.requests_counter = 0 # For generating monotonic request IDs. + self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. + + # Skia addition: remove the given prefix from tree paths. + self.prefix_to_remove = prefix_to_remove + + # If necessary, create disambiguation lookup table + self.disambiguate = source_root_path is not None + self.disambiguation_table = {} + self.strip_base_path = strip_base_path + if(self.disambiguate): + self.source_root_path = os.path.abspath(source_root_path) + self._CreateDisambiguationTable() + + # Create one addr2line instance. More instances will be created on demand + # (up to |max_concurrent_jobs|) depending on the rate of the requests. + self._CreateNewA2LInstance() + + def SymbolizeAsync(self, addr, callback_arg=None): + """Requests symbolization of a given address. + + This method is not guaranteed to return immediately. It generally does, but + in some scenarios (e.g. all addr2line instances have full queues) it can + block to create back-pressure. + + Args: + addr: address to symbolize. + callback_arg: optional argument which will be passed to the |callback|.""" + assert(isinstance(addr, int)) + + # Process all the symbols that have been resolved in the meanwhile. + # Essentially, this drains all the addr2line(s) out queues. + for a2l_to_purge in self._a2l_instances: + a2l_to_purge.ProcessAllResolvedSymbolsInQueue() + a2l_to_purge.RecycleIfNecessary() + + # Find the best instance according to this logic: + # 1. Find an existing instance with the shortest queue. + # 2. If all of instances' queues are full, but there is room in the pool, + # (i.e. < |max_concurrent_jobs|) create a new instance. + # 3. If there were already |max_concurrent_jobs| instances and all of them + # had full queues, make back-pressure. + + # 1. + def _SortByQueueSizeAndReqID(a2l): + return (a2l.queue_size, a2l.first_request_id) + a2l = min(self._a2l_instances, key=_SortByQueueSizeAndReqID) + + # 2. + if (a2l.queue_size >= self.max_queue_size and + len(self._a2l_instances) < self.max_concurrent_jobs): + a2l = self._CreateNewA2LInstance() + + # 3. + if a2l.queue_size >= self.max_queue_size: + a2l.WaitForNextSymbolInQueue() + + a2l.EnqueueRequest(addr, callback_arg) + + def Join(self): + """Waits for all the outstanding requests to complete and terminates.""" + for a2l in self._a2l_instances: + a2l.WaitForIdle() + a2l.Terminate() + + def _CreateNewA2LInstance(self): + assert(len(self._a2l_instances) < self.max_concurrent_jobs) + a2l = ELFSymbolizer.Addr2Line(self) + self._a2l_instances.append(a2l) + return a2l + + def _CreateDisambiguationTable(self): + """ Non-unique file names will result in None entries""" + self.disambiguation_table = {} + + for root, _, filenames in os.walk(self.source_root_path): + for f in filenames: + self.disambiguation_table[f] = os.path.join(root, f) if (f not in + self.disambiguation_table) else None + + + class Addr2Line(object): + """A python wrapper around an addr2line instance. + + The communication with the addr2line process looks as follows: + [STDIN] [STDOUT] (from addr2line's viewpoint) + > f001111 + > f002222 + < Symbol::Name(foo, bar) for f001111 + < /path/to/source/file.c:line_number + > f003333 + < Symbol::Name2() for f002222 + < /path/to/source/file.c:line_number + < Symbol::Name3() for f003333 + < /path/to/source/file.c:line_number + """ + + SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*') + + def __init__(self, symbolizer): + self._symbolizer = symbolizer + self._lib_file_name = posixpath.basename(symbolizer.elf_file_path) + + # The request queue (i.e. addresses pushed to addr2line's stdin and not + # yet retrieved on stdout) + self._request_queue = collections.deque() + + # This is essentially len(self._request_queue). It has been optimized to a + # separate field because turned out to be a perf hot-spot. + self.queue_size = 0 + + # Keep track of the number of symbols a process has processed to + # avoid a single process growing too big and using all the memory. + self._processed_symbols_count = 0 + + # Objects required to handle the addr2line subprocess. + self._proc = None # Subprocess.Popen(...) instance. + self._thread = None # Threading.thread instance. + self._out_queue = None # Queue.Queue instance (for buffering a2l stdout). + self._RestartAddr2LineProcess() + + def EnqueueRequest(self, addr, callback_arg): + """Pushes an address to addr2line's stdin (and keeps track of it).""" + self._symbolizer.requests_counter += 1 # For global "age" of requests. + req_idx = self._symbolizer.requests_counter + self._request_queue.append((addr, callback_arg, req_idx)) + self.queue_size += 1 + self._WriteToA2lStdin(addr) + + def WaitForIdle(self): + """Waits until all the pending requests have been symbolized.""" + while self.queue_size > 0: + self.WaitForNextSymbolInQueue() + + def WaitForNextSymbolInQueue(self): + """Waits for the next pending request to be symbolized.""" + if not self.queue_size: + return + + # This outer loop guards against a2l hanging (detecting stdout timeout). + while True: + start_time = datetime.datetime.now() + timeout = datetime.timedelta(seconds=self._symbolizer.addr2line_timeout) + + # The inner loop guards against a2l crashing (checking if it exited). + while (datetime.datetime.now() - start_time < timeout): + # poll() returns !None if the process exited. a2l should never exit. + if self._proc.poll(): + logging.warning('addr2line crashed, respawning (lib: %s).' % + self._lib_file_name) + self._RestartAddr2LineProcess() + # TODO(primiano): the best thing to do in this case would be + # shrinking the pool size as, very likely, addr2line is crashed + # due to low memory (and the respawned one will die again soon). + + try: + lines = self._out_queue.get(block=True, timeout=0.25) + except Queue.Empty: + # On timeout (1/4 s.) repeat the inner loop and check if either the + # addr2line process did crash or we waited its output for too long. + continue + + # In nominal conditions, we get straight to this point. + self._ProcessSymbolOutput(lines) + return + + # If this point is reached, we waited more than |addr2line_timeout|. + logging.warning('Hung addr2line process, respawning (lib: %s).' % + self._lib_file_name) + self._RestartAddr2LineProcess() + + def ProcessAllResolvedSymbolsInQueue(self): + """Consumes all the addr2line output lines produced (without blocking).""" + if not self.queue_size: + return + while True: + try: + lines = self._out_queue.get_nowait() + except Queue.Empty: + break + self._ProcessSymbolOutput(lines) + + def RecycleIfNecessary(self): + """Restarts the process if it has been used for too long. + + A long running addr2line process will consume excessive amounts + of memory without any gain in performance.""" + if self._processed_symbols_count >= ADDR2LINE_RECYCLE_LIMIT: + self._RestartAddr2LineProcess() + + + def Terminate(self): + """Kills the underlying addr2line process. + + The poller |_thread| will terminate as well due to the broken pipe.""" + try: + self._proc.kill() + self._proc.communicate() # Essentially wait() without risking deadlock. + except Exception: # An exception while terminating? How interesting. + pass + self._proc = None + + def _WriteToA2lStdin(self, addr): + self._proc.stdin.write('%s\n' % hex(addr)) + if self._symbolizer.inlines: + # In the case of inlines we output an extra blank line, which causes + # addr2line to emit a (??,??:0) tuple that we use as a boundary marker. + self._proc.stdin.write('\n') + self._proc.stdin.flush() + + def _ProcessSymbolOutput(self, lines): + """Parses an addr2line symbol output and triggers the client callback.""" + (_, callback_arg, _) = self._request_queue.popleft() + self.queue_size -= 1 + + innermost_sym_info = None + sym_info = None + for (line1, line2) in lines: + prev_sym_info = sym_info + name = line1 if not line1.startswith('?') else None + source_path = None + source_line = None + m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) + if m: + if not m.group(1).startswith('?'): + source_path = m.group(1) + if not m.group(2).startswith('?'): + source_line = int(m.group(2)) + else: + logging.warning('Got invalid symbol path from addr2line: %s' % line2) + + # In case disambiguation is on, and needed + was_ambiguous = False + disambiguated = False + if self._symbolizer.disambiguate: + if source_path and not posixpath.isabs(source_path): + path = self._symbolizer.disambiguation_table.get(source_path) + was_ambiguous = True + disambiguated = path is not None + source_path = path if disambiguated else source_path + + # Use absolute paths (so that paths are consistent, as disambiguation + # uses absolute paths) + if source_path and not was_ambiguous: + source_path = os.path.abspath(source_path) + + if source_path and self._symbolizer.strip_base_path: + # Strip the base path + source_path = re.sub('^' + self._symbolizer.strip_base_path, + self._symbolizer.source_root_path or '', source_path) + + sym_info = ELFSymbolInfo(name, source_path, source_line, was_ambiguous, + disambiguated, + self._symbolizer.prefix_to_remove) + if prev_sym_info: + prev_sym_info.inlined_by = sym_info + if not innermost_sym_info: + innermost_sym_info = sym_info + + self._processed_symbols_count += 1 + self._symbolizer.callback(innermost_sym_info, callback_arg) + + def _RestartAddr2LineProcess(self): + if self._proc: + self.Terminate() + + # The only reason of existence of this Queue (and the corresponding + # Thread below) is the lack of a subprocess.stdout.poll_avail_lines(). + # Essentially this is a pipe able to extract a couple of lines atomically. + self._out_queue = Queue.Queue() + + # Start the underlying addr2line process in line buffered mode. + + cmd = [self._symbolizer.addr2line_path, '--functions', '--demangle', + '--exe=' + self._symbolizer.elf_file_path] + if self._symbolizer.inlines: + cmd += ['--inlines'] + self._proc = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE, + stdin=subprocess.PIPE, stderr=sys.stderr, close_fds=True) + + # Start the poller thread, which simply moves atomically the lines read + # from the addr2line's stdout to the |_out_queue|. + self._thread = threading.Thread( + target=ELFSymbolizer.Addr2Line.StdoutReaderThread, + args=(self._proc.stdout, self._out_queue, self._symbolizer.inlines)) + self._thread.daemon = True # Don't prevent early process exit. + self._thread.start() + + self._processed_symbols_count = 0 + + # Replay the pending requests on the new process (only for the case + # of a hung addr2line timing out during the game). + for (addr, _, _) in self._request_queue: + self._WriteToA2lStdin(addr) + + @staticmethod + def StdoutReaderThread(process_pipe, queue, inlines): + """The poller thread fn, which moves the addr2line stdout to the |queue|. + + This is the only piece of code not running on the main thread. It merely + writes to a Queue, which is thread-safe. In the case of inlines, it + detects the ??,??:0 marker and sends the lines atomically, such that the + main thread always receives all the lines corresponding to one symbol in + one shot.""" + try: + lines_for_one_symbol = [] + while True: + line1 = process_pipe.readline().rstrip('\r\n') + line2 = process_pipe.readline().rstrip('\r\n') + if not line1 or not line2: + break + inline_has_more_lines = inlines and (len(lines_for_one_symbol) == 0 or + (line1 != '??' and line2 != '??:0')) + if not inlines or inline_has_more_lines: + lines_for_one_symbol += [(line1, line2)] + if inline_has_more_lines: + continue + queue.put(lines_for_one_symbol) + lines_for_one_symbol = [] + process_pipe.close() + + # Every addr2line processes will die at some point, please die silently. + except (IOError, OSError): + pass + + @property + def first_request_id(self): + """Returns the request_id of the oldest pending request in the queue.""" + return self._request_queue[0][2] if self._request_queue else 0 + + +class ELFSymbolInfo(object): + """The result of the symbolization passed as first arg. of each callback.""" + + def __init__(self, name, source_path, source_line, was_ambiguous=False, + disambiguated=False, prefix_to_remove=None): + """All the fields here can be None (if addr2line replies with '??').""" + self.name = name + if source_path and source_path.startswith(prefix_to_remove): + source_path = source_path[len(prefix_to_remove) : ] + self.source_path = source_path + self.source_line = source_line + # In the case of |inlines|=True, the |inlined_by| points to the outer + # function inlining the current one (and so on, to form a chain). + self.inlined_by = None + self.disambiguated = disambiguated + self.was_ambiguous = was_ambiguous + + def __str__(self): + return '%s [%s:%d]' % ( + self.name or '??', self.source_path or '??', self.source_line or 0) diff --git a/infra/bots/recipe_modules/core/resources/generate_and_upload_doxygen.py b/infra/bots/recipe_modules/core/resources/generate_and_upload_doxygen.py new file mode 100755 index 0000000000..f06ea96bc7 --- /dev/null +++ b/infra/bots/recipe_modules/core/resources/generate_and_upload_doxygen.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +"""Generate Doxygen documentation.""" + + +import datetime +import os +import shutil +import subprocess +import sys + + +DOXYFILE_BASENAME = 'Doxyfile' # must match name of Doxyfile in skia root +DOXYGEN_BINARY = 'doxygen' +WORKDIR = os.path.join(os.pardir, 'doxygen_workdir') +DOXYGEN_CONFIG_DIR = os.path.join(WORKDIR, 'doxygen-config') +DOXYGEN_WORKING_DIR = os.path.join(WORKDIR, 'doxygen') +DOXYGEN_GS_PATH = '/'.join(['gs://chromium-skia-gm', 'doxygen']) + +IFRAME_FOOTER_TEMPLATE = """ +<html><body><address style="text-align: right;"><small> +Generated at %s for skia +by <a href="http://www.doxygen.org/index.html">doxygen</a> +%s </small></address></body></html> +""" + + +def recreate_dir(path): + """Delete and recreate the directory.""" + try: + shutil.rmtree(path) + except OSError: + if os.path.exists(path): + raise Exception('Could not remove %s' % path) + os.makedirs(path) + + +def generate_and_upload_doxygen(gsutil_path): + """Generate Doxygen.""" + # Create empty dir and add static_footer.txt + recreate_dir(DOXYGEN_WORKING_DIR) + static_footer_path = os.path.join(DOXYGEN_WORKING_DIR, 'static_footer.txt') + shutil.copyfile(os.path.join('tools', 'doxygen_footer.txt'), + static_footer_path) + + # Make copy of doxygen config file, overriding any necessary configs, + # and run doxygen. + recreate_dir(DOXYGEN_CONFIG_DIR) + modified_doxyfile = os.path.join(DOXYGEN_CONFIG_DIR, DOXYFILE_BASENAME) + with open(DOXYFILE_BASENAME, 'r') as reader: + with open(modified_doxyfile, 'w') as writer: + shutil.copyfileobj(reader, writer) + writer.write('OUTPUT_DIRECTORY = %s\n' % DOXYGEN_WORKING_DIR) + writer.write('HTML_FOOTER = %s\n' % static_footer_path) + subprocess.check_call([DOXYGEN_BINARY, modified_doxyfile]) + + # Create iframe_footer.html + with open(os.path.join(DOXYGEN_WORKING_DIR, 'iframe_footer.html'), 'w') as f: + f.write(IFRAME_FOOTER_TEMPLATE % ( + datetime.datetime.now().isoformat(' '), + subprocess.check_output([DOXYGEN_BINARY, '--version']).rstrip())) + + # Upload. + cmd = [gsutil_path, 'cp', '-a', 'public-read', '-R', + DOXYGEN_WORKING_DIR, DOXYGEN_GS_PATH] + subprocess.check_call(cmd) + + +if '__main__' == __name__: + generate_and_upload_doxygen(*sys.argv[1:]) + diff --git a/infra/bots/recipe_modules/core/resources/run_binary_size_analysis.py b/infra/bots/recipe_modules/core/resources/run_binary_size_analysis.py new file mode 100755 index 0000000000..5cb24d967a --- /dev/null +++ b/infra/bots/recipe_modules/core/resources/run_binary_size_analysis.py @@ -0,0 +1,817 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Generate a spatial analysis against an arbitrary library. + +Adapted for Skia's use case from +chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes: + +-- Cleans up some deprecated codes. +-- Always use relative code path so the tree root is Skia repo's root. +-- Instead of outputting the standalone HTML/CSS/JS filesets, writes the + TreeMap JSON data into a Google Storage bucket. +-- Adds githash and total_size to the JSON data. +-- Outputs another summary data in JSON Bench format for skiaperf ingestion. + +The output JSON data for visualization is in the following format: + +{ + "githash": 123abc, + "commit_ts": 1234567890, + "total_size": 1234567, + "key": {"source_type": "binary_size"}, + "tree_data": { + "maxDepth": 9, + "k": "p", "children":[ + {"k":"p","children":[ + {"k":"p","children":[ + {"k":"p","lastPathElement":true,"children":[ + {"k":"b","t":"t","children":[ + {"k":"s", "t":"t", "value":4029, + "n":"etc_encode_subblock_helper(unsigned char const*, ...)" + }, + ...... + } +} + +Another JSON file is generated for size summaries to be used in skiaperf. The +JSON format details can be found at: + https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54 +and: + https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go + +In the binary size case, outputs look like: + +{ + "gitHash": "123abc", + "key": { + "source_type": "binarysize" + } + "results: { + "src_lazy_global_weak_symbol": { + "memory": { + "bytes": 41, + "options": { + "path": "src_lazy", + "symbol": "global_weak_symbol" + } + } + }, + "src_lazy_global_read_only_data": { + "memory": { + "bytes": 13476, + "options": { + "path": "src_lazy", + "symbol": "global_read_only_data" + } + } + }, + ... + } +} + +""" + +import collections +import datetime +import json +import logging +import multiprocessing +import optparse +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile +import time +import urllib2 + +import binary_size_utils +import elf_symbolizer + +from recipe_engine.types import freeze + +# Node dictionary keys. These are output in json read by the webapp so +# keep them short to save file size. +# Note: If these change, the webapp must also change. +NODE_TYPE_KEY = 'k' +NODE_NAME_KEY = 'n' +NODE_CHILDREN_KEY = 'children' +NODE_SYMBOL_TYPE_KEY = 't' +NODE_SYMBOL_SIZE_KEY = 'value' +NODE_MAX_DEPTH_KEY = 'maxDepth' +NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' + +# The display name of the bucket where we put symbols without path. +NAME_NO_PATH_BUCKET = '(No Path)' + +# Try to keep data buckets smaller than this to avoid killing the +# graphing lib. +BIG_BUCKET_LIMIT = 3000 + +# Skia addition: relative dir for libskia.so from code base. +LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib') + +# Skia addition: dictionary mapping symbol type code to symbol name. +# See +# https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/template/D3SymbolTreeMap.js&l=74 +SYMBOL_MAP = freeze({ + 'A': 'global_absolute', + 'B': 'global_uninitialized_data', + 'b': 'local_uninitialized_data', + 'C': 'global_uninitialized_common', + 'D': 'global_initialized_data', + 'd': 'local_initialized_data', + 'G': 'global_small initialized_data', + 'g': 'local_small_initialized_data', + 'i': 'indirect_function', + 'N': 'debugging', + 'p': 'stack_unwind', + 'R': 'global_read_only_data', + 'r': 'local_read_only_data', + 'S': 'global_small_uninitialized_data', + 's': 'local_small_uninitialized_data', + 'T': 'global_code', + 't': 'local_code', + 'U': 'undefined', + 'u': 'unique', + 'V': 'global_weak_object', + 'v': 'local_weak_object', + 'W': 'global_weak_symbol', + 'w': 'local_weak_symbol', + '@': 'vtable_entry', + '-': 'stabs_debugging', + '?': 'unrecognized', +}) + + +def _MkChild(node, name): + child = node[NODE_CHILDREN_KEY].get(name) + if child is None: + child = {NODE_NAME_KEY: name, + NODE_CHILDREN_KEY: {}} + node[NODE_CHILDREN_KEY][name] = child + return child + + +def SplitNoPathBucket(node): + """NAME_NO_PATH_BUCKET can be too large for the graphing lib to + handle. Split it into sub-buckets in that case.""" + root_children = node[NODE_CHILDREN_KEY] + if NAME_NO_PATH_BUCKET in root_children: + no_path_bucket = root_children[NAME_NO_PATH_BUCKET] + old_children = no_path_bucket[NODE_CHILDREN_KEY] + count = 0 + for symbol_type, symbol_bucket in old_children.iteritems(): + count += len(symbol_bucket[NODE_CHILDREN_KEY]) + if count > BIG_BUCKET_LIMIT: + new_children = {} + no_path_bucket[NODE_CHILDREN_KEY] = new_children + current_bucket = None + index = 0 + for symbol_type, symbol_bucket in old_children.iteritems(): + for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): + if index % BIG_BUCKET_LIMIT == 0: + group_no = (index / BIG_BUCKET_LIMIT) + 1 + current_bucket = _MkChild(no_path_bucket, + '%s subgroup %d' % (NAME_NO_PATH_BUCKET, + group_no)) + assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' + node[NODE_TYPE_KEY] = 'p' # p for path + index += 1 + symbol_size = value[NODE_SYMBOL_SIZE_KEY] + AddSymbolIntoFileNode(current_bucket, symbol_type, + symbol_name, symbol_size) + + +def MakeChildrenDictsIntoLists(node): + largest_list_len = 0 + if NODE_CHILDREN_KEY in node: + largest_list_len = len(node[NODE_CHILDREN_KEY]) + child_list = [] + for child in node[NODE_CHILDREN_KEY].itervalues(): + child_largest_list_len = MakeChildrenDictsIntoLists(child) + if child_largest_list_len > largest_list_len: + largest_list_len = child_largest_list_len + child_list.append(child) + node[NODE_CHILDREN_KEY] = child_list + + return largest_list_len + + +def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): + """Puts symbol into the file path node |node|. + Returns the number of added levels in tree. I.e. returns 2.""" + + # 'node' is the file node and first step is to find its symbol-type bucket. + node[NODE_LAST_PATH_ELEMENT_KEY] = True + node = _MkChild(node, symbol_type) + assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' + node[NODE_SYMBOL_TYPE_KEY] = symbol_type + node[NODE_TYPE_KEY] = 'b' # b for bucket + + # 'node' is now the symbol-type bucket. Make the child entry. + node = _MkChild(node, symbol_name) + if NODE_CHILDREN_KEY in node: + if node[NODE_CHILDREN_KEY]: + logging.warning('A container node used as symbol for %s.' % symbol_name) + # This is going to be used as a leaf so no use for child list. + del node[NODE_CHILDREN_KEY] + node[NODE_SYMBOL_SIZE_KEY] = symbol_size + node[NODE_SYMBOL_TYPE_KEY] = symbol_type + node[NODE_TYPE_KEY] = 's' # s for symbol + + return 2 # Depth of the added subtree. + + +def MakeCompactTree(symbols, symbol_path_origin_dir): + result = {NODE_NAME_KEY: '/', + NODE_CHILDREN_KEY: {}, + NODE_TYPE_KEY: 'p', + NODE_MAX_DEPTH_KEY: 0} + seen_symbol_with_path = False + for symbol_name, symbol_type, symbol_size, file_path in symbols: + + if 'vtable for ' in symbol_name: + symbol_type = '@' # hack to categorize these separately + if file_path and file_path != "??": + seen_symbol_with_path = True + else: + file_path = NAME_NO_PATH_BUCKET + + path_parts = file_path.split('/') + + # Find pre-existing node in tree, or update if it already exists + node = result + depth = 0 + while len(path_parts) > 0: + path_part = path_parts.pop(0) + if len(path_part) == 0: + continue + depth += 1 + node = _MkChild(node, path_part) + assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' + node[NODE_TYPE_KEY] = 'p' # p for path + + depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) + result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) + + if not seen_symbol_with_path: + logging.warning('Symbols lack paths. Data will not be structured.') + + # The (no path) bucket can be extremely large if we failed to get + # path information. Split it into subgroups if needed. + SplitNoPathBucket(result) + + largest_list_len = MakeChildrenDictsIntoLists(result) + + if largest_list_len > BIG_BUCKET_LIMIT: + logging.warning('There are sections with %d nodes. ' + 'Results might be unusable.' % largest_list_len) + return result + + +# Skia added: summarizes tree size by symbol type for the given root node. +# Returns a dict keyed by symbol type, and value the type's overall size. +# e.g., {"t": 12345, "W": 543}. +def GetTreeSizes(node): + if 'children' not in node or not node['children']: + return {node['t']: node['value']} + dic = {} + for i in node['children']: + for k, v in GetTreeSizes(i).items(): + dic.setdefault(k, 0) + dic[k] += v + + return dic + + +# Skia added: creates dict to be converted to JSON in bench format. +# See top of file for the structure description. +def GetBenchDict(githash, tree_root): + dic = {'gitHash': githash, + 'key': {'source_type': 'binarysize'}, + 'results': {},} + for i in tree_root['children']: + if '(No Path)' == i['n']: # Already at symbol summary level. + for k, v in GetTreeSizes(i).items(): + dic['results']['no_path_' + SYMBOL_MAP[k]] = { + 'memory': { + 'bytes': v, + 'options': {'path': 'no_path', + 'symbol': SYMBOL_MAP[k],},}} + else: # We need to go deeper. + for c in i['children']: + path = i['n'] + '_' + c['n'] + for k, v in GetTreeSizes(c).items(): + dic['results'][path + '_' + SYMBOL_MAP[k]] = { + 'memory': { + 'bytes': v, + 'options': {'path': path, + 'symbol': SYMBOL_MAP[k],}}} + + return dic + + +# Skia added: constructs 'gsutil cp' subprocess command list. +def GetGsCopyCommandList(gsutil, src, dst): + return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a', + 'public-read', src, dst] + + +def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil): + tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) + json_data = {'tree_data': tree_root, + 'githash': ha, + 'commit_ts': ts, + 'key': {'source_type': 'binary_size'}, + 'total_size': sum(GetTreeSizes(tree_root).values()),} + tmpfile = tempfile.NamedTemporaryFile(delete=False).name + with open(tmpfile, 'w') as out: + # Use separators without whitespace to get a smaller file. + json.dump(json_data, out, separators=(',', ':')) + + GS_PREFIX = 'gs://chromium-skia-gm/' + # Writes to Google Storage for visualization. + subprocess.check_call(GetGsCopyCommandList( + gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json')) + # Updates the latest data. + if not issue: + subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, + GS_PREFIX + 'size/latest.json')) + # Writes an extra copy using year/month/day/hour path for easy ingestion. + with open(tmpfile, 'w') as out: + json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':')) + now = datetime.datetime.utcnow() + ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4), + str(now.month).zfill(2), str(now.day).zfill(2), + str(now.hour).zfill(2))) + if issue: + ingest_path = '/'.join('trybot', ingest_path, issue) + subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, + GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json')) + + +def MakeSourceMap(symbols): + sources = {} + for _sym, _symbol_type, size, path in symbols: + key = None + if path: + key = os.path.normpath(path) + else: + key = '[no path]' + if key not in sources: + sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} + record = sources[key] + record['size'] += size + record['symbol_count'] += 1 + return sources + + +# Regex for parsing "nm" output. A sample line looks like this: +# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 +# +# The fields are: address, size, type, name, source location +# Regular expression explained ( see also: https://xkcd.com/208 ): +# ([0-9a-f]{8,}+) The address +# [\s]+ Whitespace separator +# ([0-9a-f]{8,}+) The size. From here on out it's all optional. +# [\s]+ Whitespace separator +# (\S?) The symbol type, which is any non-whitespace char +# [\s*] Whitespace separator +# ([^\t]*) Symbol name, any non-tab character (spaces ok!) +# [\t]? Tab separator +# (.*) The location (filename[:linennum|?][ (discriminator n)] +sNmPattern = re.compile( + r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') + +class Progress(): + def __init__(self): + self.count = 0 + self.skip_count = 0 + self.collisions = 0 + self.time_last_output = time.time() + self.count_last_output = 0 + self.disambiguations = 0 + self.was_ambiguous = 0 + + +def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, + disambiguate, src_path): + nm_output = RunNm(library, nm_binary) + nm_output_lines = nm_output.splitlines() + nm_output_lines_len = len(nm_output_lines) + address_symbol = {} + progress = Progress() + def map_address_symbol(symbol, addr): + progress.count += 1 + if addr in address_symbol: + # 'Collision between %s and %s.' % (str(symbol.name), + # str(address_symbol[addr].name)) + progress.collisions += 1 + else: + if symbol.disambiguated: + progress.disambiguations += 1 + if symbol.was_ambiguous: + progress.was_ambiguous += 1 + + address_symbol[addr] = symbol + + progress_output() + + def progress_output(): + progress_chunk = 100 + if progress.count % progress_chunk == 0: + time_now = time.time() + time_spent = time_now - progress.time_last_output + if time_spent > 1.0: + # Only output at most once per second. + progress.time_last_output = time_now + chunk_size = progress.count - progress.count_last_output + progress.count_last_output = progress.count + if time_spent > 0: + speed = chunk_size / time_spent + else: + speed = 0 + progress_percent = (100.0 * (progress.count + progress.skip_count) / + nm_output_lines_len) + disambiguation_percent = 0 + if progress.disambiguations != 0: + disambiguation_percent = (100.0 * progress.disambiguations / + progress.was_ambiguous) + + sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' + '%d disambiguations where %.1f%% succeeded)' + ' - %.1f lookups/s.' % + (progress_percent, progress.count, progress.collisions, + progress.disambiguations, disambiguation_percent, speed)) + + # In case disambiguation was disabled, we remove the source path (which upon + # being set signals the symbolizer to enable disambiguation) + if not disambiguate: + src_path = None + symbol_path_origin_dir = os.path.dirname(library) + # Skia specific. + symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') + symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, + map_address_symbol, + max_concurrent_jobs=jobs, + source_root_path=src_path, + prefix_to_remove=symbol_path_prefix) + user_interrupted = False + try: + for line in nm_output_lines: + match = sNmPattern.match(line) + if match: + location = match.group(5) + if not location: + addr = int(match.group(1), 16) + size = int(match.group(2), 16) + if addr in address_symbol: # Already looked up, shortcut + # ELFSymbolizer. + map_address_symbol(address_symbol[addr], addr) + continue + elif size == 0: + # Save time by not looking up empty symbols (do they even exist?) + print('Empty symbol: ' + line) + else: + symbolizer.SymbolizeAsync(addr, addr) + continue + + progress.skip_count += 1 + except KeyboardInterrupt: + user_interrupted = True + print('Interrupting - killing subprocesses. Please wait.') + + try: + symbolizer.Join() + except KeyboardInterrupt: + # Don't want to abort here since we will be finished in a few seconds. + user_interrupted = True + print('Patience you must have my young padawan.') + + print '' + + if user_interrupted: + print('Skipping the rest of the file mapping. ' + 'Output will not be fully classified.') + + symbol_path_origin_dir = os.path.dirname(library) + # Skia specific: path prefix to strip. + symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') + + with open(outfile, 'w') as out: + for line in nm_output_lines: + match = sNmPattern.match(line) + if match: + location = match.group(5) + if not location: + addr = int(match.group(1), 16) + symbol = address_symbol.get(addr) + if symbol is not None: + path = '??' + if symbol.source_path is not None: + path = symbol.source_path.replace(symbol_path_prefix, '') + line_number = 0 + if symbol.source_line is not None: + line_number = symbol.source_line + out.write('%s\t%s:%d\n' % (line, path, line_number)) + continue + + out.write('%s\n' % line) + + print('%d symbols in the results.' % len(address_symbol)) + + +def RunNm(binary, nm_binary): + cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', + binary] + nm_process = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (process_output, err_output) = nm_process.communicate() + + if nm_process.returncode != 0: + if err_output: + raise Exception, err_output + else: + raise Exception, process_output + + return process_output + + +def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, + addr2line_binary, nm_binary, disambiguate, src_path): + if nm_infile is None: + if outfile is None: + outfile = tempfile.NamedTemporaryFile(delete=False).name + + if verbose: + print 'Running parallel addr2line, dumping symbols to ' + outfile + RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, + disambiguate, src_path) + + nm_infile = outfile + + elif verbose: + print 'Using nm input from ' + nm_infile + with file(nm_infile, 'r') as infile: + return list(binary_size_utils.ParseNm(infile)) + + +PAK_RESOURCE_ID_TO_STRING = { "inited": False } + +def LoadPakIdsFromResourceFile(filename): + """Given a file name, it loads everything that looks like a resource id + into PAK_RESOURCE_ID_TO_STRING.""" + with open(filename) as resource_header: + for line in resource_header: + if line.startswith("#define "): + line_data = line.split() + if len(line_data) == 3: + try: + resource_number = int(line_data[2]) + resource_name = line_data[1] + PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name + except ValueError: + pass + +def GetReadablePakResourceName(pak_file, resource_id): + """Pak resources have a numeric identifier. It is not helpful when + trying to locate where footprint is generated. This does its best to + map the number to a usable string.""" + if not PAK_RESOURCE_ID_TO_STRING['inited']: + # Try to find resource header files generated by grit when + # building the pak file. We'll look for files named *resources.h" + # and lines of the type: + # #define MY_RESOURCE_JS 1234 + PAK_RESOURCE_ID_TO_STRING['inited'] = True + gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') + if os.path.isdir(gen_dir): + for dirname, _dirs, files in os.walk(gen_dir): + for filename in files: + if filename.endswith('resources.h'): + LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) + return PAK_RESOURCE_ID_TO_STRING.get(resource_id, + 'Pak Resource %d' % resource_id) + +def AddPakData(symbols, pak_file): + """Adds pseudo-symbols from a pak file.""" + pak_file = os.path.abspath(pak_file) + with open(pak_file, 'rb') as pak: + data = pak.read() + + PAK_FILE_VERSION = 4 + HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) + # and one uint8 (encoding of text resources) + INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. + version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) + assert version == PAK_FILE_VERSION, ('Unsupported pak file ' + 'version (%d) in %s. Only ' + 'support version %d' % + (version, pak_file, PAK_FILE_VERSION)) + if num_entries > 0: + # Read the index and data. + data = data[HEADER_LENGTH:] + for _ in range(num_entries): + resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) + data = data[INDEX_ENTRY_SIZE:] + _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) + resource_size = next_offset - offset + + symbol_name = GetReadablePakResourceName(pak_file, resource_id) + symbol_path = pak_file + symbol_type = 'd' # Data. Approximation. + symbol_size = resource_size + symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) + +def _find_in_system_path(binary): + """Locate the full path to binary in the system path or return None + if not found.""" + system_path = os.environ["PATH"].split(os.pathsep) + for path in system_path: + binary_path = os.path.join(path, binary) + if os.path.isfile(binary_path): + return binary_path + return None + +def CheckDebugFormatSupport(library, addr2line_binary): + """Kills the program if debug data is in an unsupported format. + + There are two common versions of the DWARF debug formats and + since we are right now transitioning from DWARF2 to newer formats, + it's possible to have a mix of tools that are not compatible. Detect + that and abort rather than produce meaningless output.""" + tool_output = subprocess.check_output([addr2line_binary, '--version']) + version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) + parsed_output = version_re.match(tool_output) + major = int(parsed_output.group(1)) + minor = int(parsed_output.group(2)) + supports_dwarf4 = major > 2 or major == 2 and minor > 22 + + if supports_dwarf4: + return + + print('Checking version of debug information in %s.' % library) + debug_info = subprocess.check_output(['readelf', '--debug-dump=info', + '--dwarf-depth=1', library]) + dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) + parsed_dwarf_format_output = dwarf_version_re.search(debug_info) + version = int(parsed_dwarf_format_output.group(1)) + if version > 2: + print('The supplied tools only support DWARF2 debug data but the binary\n' + + 'uses DWARF%d. Update the tools or compile the binary\n' % version + + 'with -gdwarf-2.') + sys.exit(1) + + +def main(): + usage = """%prog [options] + + Runs a spatial analysis on a given library, looking up the source locations + of its symbols and calculating how much space each directory, source file, + and so on is taking. The result is a report that can be used to pinpoint + sources of large portions of the binary, etceteras. + + Under normal circumstances, you only need to pass two arguments, thusly: + + %prog --library /path/to/library --destdir /path/to/output + + In this mode, the program will dump the symbols from the specified library + and map those symbols back to source locations, producing a web-based + report in the specified output directory. + + Other options are available via '--help'. + """ + parser = optparse.OptionParser(usage=usage) + parser.add_option('--nm-in', metavar='PATH', + help='if specified, use nm input from <path> instead of ' + 'generating it. Note that source locations should be ' + 'present in the file; i.e., no addr2line symbol lookups ' + 'will be performed when this option is specified. ' + 'Mutually exclusive with --library.') + parser.add_option('--destdir', metavar='PATH', + help='write output to the specified directory. An HTML ' + 'report is generated here along with supporting files; ' + 'any existing report will be overwritten. Not used in ' + 'Skia.') + parser.add_option('--library', metavar='PATH', + help='if specified, process symbols in the library at ' + 'the specified path. Mutually exclusive with --nm-in.') + parser.add_option('--pak', metavar='PATH', + help='if specified, includes the contents of the ' + 'specified *.pak file in the output.') + parser.add_option('--nm-binary', + help='use the specified nm binary to analyze library. ' + 'This is to be used when the nm in the path is not for ' + 'the right architecture or of the right version.') + parser.add_option('--addr2line-binary', + help='use the specified addr2line binary to analyze ' + 'library. This is to be used when the addr2line in ' + 'the path is not for the right architecture or ' + 'of the right version.') + parser.add_option('--jobs', type='int', + help='number of jobs to use for the parallel ' + 'addr2line processing pool; defaults to 1. More ' + 'jobs greatly improve throughput but eat RAM like ' + 'popcorn, and take several gigabytes each. Start low ' + 'and ramp this number up until your machine begins to ' + 'struggle with RAM. ' + 'This argument is only valid when using --library.') + parser.add_option('-v', dest='verbose', action='store_true', + help='be verbose, printing lots of status information.') + parser.add_option('--nm-out', metavar='PATH', + help='keep the nm output file, and store it at the ' + 'specified path. This is useful if you want to see the ' + 'fully processed nm output after the symbols have been ' + 'mapped to source locations. By default, a tempfile is ' + 'used and is deleted when the program terminates.' + 'This argument is only valid when using --library.') + parser.add_option('--legacy', action='store_true', + help='emit legacy binary size report instead of modern') + parser.add_option('--disable-disambiguation', action='store_true', + help='disables the disambiguation process altogether,' + ' NOTE: this may, depending on your toolchain, produce' + ' output with some symbols at the top layer if addr2line' + ' could not get the entire source path.') + parser.add_option('--source-path', default='./', + help='the path to the source code of the output binary, ' + 'default set to current directory. Used in the' + ' disambiguation process.') + parser.add_option('--githash', default='latest', + help='Git hash for the binary version. Added by Skia.') + parser.add_option('--commit_ts', type='int', default=-1, + help='Timestamp for the commit. Added by Skia.') + parser.add_option('--issue_number', default='', + help='The trybot issue number in string. Added by Skia.') + parser.add_option('--gsutil_path', default='gsutil', + help='Path to gsutil binary. Added by Skia.') + opts, _args = parser.parse_args() + + if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): + parser.error('exactly one of --library or --nm-in is required') + if (opts.nm_in): + if opts.jobs: + print >> sys.stderr, ('WARNING: --jobs has no effect ' + 'when used with --nm-in') + if not opts.jobs: + # Use the number of processors but cap between 2 and 4 since raw + # CPU power isn't the limiting factor. It's I/O limited, memory + # bus limited and available-memory-limited. Too many processes and + # the computer will run out of memory and it will be slow. + opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) + + if opts.addr2line_binary: + assert os.path.isfile(opts.addr2line_binary) + addr2line_binary = opts.addr2line_binary + else: + addr2line_binary = _find_in_system_path('addr2line') + assert addr2line_binary, 'Unable to find addr2line in the path. '\ + 'Use --addr2line-binary to specify location.' + + if opts.nm_binary: + assert os.path.isfile(opts.nm_binary) + nm_binary = opts.nm_binary + else: + nm_binary = _find_in_system_path('nm') + assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ + 'to specify location.' + + if opts.pak: + assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak + + print('addr2line: %s' % addr2line_binary) + print('nm: %s' % nm_binary) + + if opts.library: + CheckDebugFormatSupport(opts.library, addr2line_binary) + + symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, + opts.jobs, opts.verbose is True, + addr2line_binary, nm_binary, + opts.disable_disambiguation is None, + opts.source_path) + + if opts.pak: + AddPakData(symbols, opts.pak) + + if opts.legacy: # legacy report + print 'Do Not set legacy flag.' + + else: # modern report + if opts.library: + symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) + else: + # Just a guess. Hopefully all paths in the input file are absolute. + symbol_path_origin_dir = os.path.abspath(os.getcwd()) + DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash, + opts.commit_ts, opts.issue_number, opts.gsutil_path) + print 'Report data uploaded to GS.' + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/infra/bots/recipe_modules/core/resources/upload_bench_results.py b/infra/bots/recipe_modules/core/resources/upload_bench_results.py new file mode 100755 index 0000000000..25cfcc2631 --- /dev/null +++ b/infra/bots/recipe_modules/core/resources/upload_bench_results.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +""" Upload benchmark performance data results. """ + +import gzip +import os +import os.path +import re +import subprocess +import sys +import tempfile + +from datetime import datetime + + +def _UploadJSONResults(builder_name, build_number, dest_gsbase, gs_subdir, + full_json_path, gzipped=True, gsutil_path='gsutil', + issue_number=None): + now = datetime.utcnow() + gs_json_path = '/'.join((str(now.year).zfill(4), str(now.month).zfill(2), + str(now.day).zfill(2), str(now.hour).zfill(2))) + gs_dir = '/'.join((gs_subdir, gs_json_path, builder_name)) + if builder_name.endswith('-Trybot'): + if not issue_number: + raise Exception('issue_number build property is missing!') + gs_dir = '/'.join(('trybot', gs_dir, build_number, issue_number)) + full_path_to_upload = full_json_path + file_to_upload = os.path.basename(full_path_to_upload) + http_header = ['Content-Type:application/json'] + if gzipped: + http_header.append('Content-Encoding:gzip') + gzipped_file = os.path.join(tempfile.gettempdir(), file_to_upload) + # Apply gzip. + with open(full_path_to_upload, 'rb') as f_in: + with gzip.open(gzipped_file, 'wb') as f_out: + f_out.writelines(f_in) + full_path_to_upload = gzipped_file + cmd = ['python', gsutil_path] + for header in http_header: + cmd.extend(['-h', header]) + cmd.extend(['cp', '-a', 'public-read', full_path_to_upload, + '/'.join((dest_gsbase, gs_dir, file_to_upload))]) + print ' '.join(cmd) + subprocess.check_call(cmd) + + +def main(builder_name, build_number, perf_data_dir, got_revision, gsutil_path, + issue_number=None): + """Uploads gzipped nanobench JSON data.""" + # Find the nanobench JSON + file_list = os.listdir(perf_data_dir) + RE_FILE_SEARCH = re.compile( + 'nanobench_({})_[0-9]+\.json'.format(got_revision)) + nanobench_name = None + + for file_name in file_list: + if RE_FILE_SEARCH.search(file_name): + nanobench_name = file_name + break + + if nanobench_name: + dest_gsbase = 'gs://skia-perf' + nanobench_json_file = os.path.join(perf_data_dir, + nanobench_name) + _UploadJSONResults(builder_name, build_number, dest_gsbase, 'nano-json-v1', + nanobench_json_file, gsutil_path=gsutil_path, + issue_number=issue_number) + + +if __name__ == '__main__': + main(*sys.argv[1:]) + diff --git a/infra/bots/recipe_modules/core/resources/upload_dm_results.py b/infra/bots/recipe_modules/core/resources/upload_dm_results.py new file mode 100755 index 0000000000..1bee64fb78 --- /dev/null +++ b/infra/bots/recipe_modules/core/resources/upload_dm_results.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Upload DM output PNG files and JSON summary to Google Storage.""" + +import datetime +import json +import os +import shutil +import sys +import tempfile + +def main(dm_dir, git_hash, builder_name, build_number, try_issue, import_path): + """Upload DM output PNG files and JSON summary to Google Storage. + + dm_dir: path to PNG files and JSON summary (str) + git_hash: this build's Git hash (str) + builder_name: name of this builder (str) + build_number: nth build on this builder (str or int) + try_issue: Rietveld issue if this is a try job (str, int, or None) + import_path: Path to import the gs_utils package (str) + """ + # import gs_utils + sys.path.insert(0, import_path) + import gs_utils + + # Private, but Google-readable. + ACL = gs_utils.GSUtils.PredefinedACL.PRIVATE + FINE_ACLS = [( + gs_utils.GSUtils.IdType.GROUP_BY_DOMAIN, + 'google.com', + gs_utils.GSUtils.Permission.READ + )] + + # Move dm.json and verbose.log to their own directory for easy upload. + tmp = tempfile.mkdtemp() + shutil.move(os.path.join(dm_dir, 'dm.json'), + os.path.join(tmp, 'dm.json')) + shutil.move(os.path.join(dm_dir, 'verbose.log'), + os.path.join(tmp, 'verbose.log')) + + # Make sure the JSON file parses correctly. + json_file_name = os.path.join(tmp, 'dm.json') + with open(json_file_name) as jsonFile: + try: + json.load(jsonFile) + except ValueError: + json_content = open(json_file_name).read() + print >> sys.stderr, "Invalid JSON: \n\n%s\n" % json_content + raise + + # Only images are left in dm_dir. Upload any new ones. + gs = gs_utils.GSUtils() + bucket, image_dest_dir = 'chromium-skia-gm', 'dm-images-v1' + print 'Uploading images to gs://' + bucket + '/' + image_dest_dir + gs.upload_dir_contents(dm_dir, + bucket, + image_dest_dir, + upload_if = gs.UploadIf.ALWAYS, + predefined_acl = ACL, + fine_grained_acl_list = FINE_ACLS) + + + # /dm-json-v1/year/month/day/hour/git-hash/builder/build-number/dm.json + now = datetime.datetime.utcnow() + summary_dest_dir = '/'.join(['dm-json-v1', + str(now.year ).zfill(4), + str(now.month).zfill(2), + str(now.day ).zfill(2), + str(now.hour ).zfill(2), + git_hash, + builder_name, + str(build_number)]) + + # Trybot results are further siloed by CL. + if try_issue: + summary_dest_dir = '/'.join(['trybot', summary_dest_dir, str(try_issue)]) + + # Upload the JSON summary and verbose.log. + print 'Uploading logs to gs://' + bucket + '/' + summary_dest_dir + gs.upload_dir_contents(tmp, + bucket, + summary_dest_dir, + predefined_acl = ACL, + fine_grained_acl_list = FINE_ACLS) + + + # Just for hygiene, put dm.json and verbose.log back. + shutil.move(os.path.join(tmp, 'dm.json'), + os.path.join(dm_dir, 'dm.json')) + shutil.move(os.path.join(tmp, 'verbose.log'), + os.path.join(dm_dir, 'verbose.log')) + os.rmdir(tmp) + +if '__main__' == __name__: + main(*sys.argv[1:]) |